## vector with package names
x <- c("pbapply", "viridis", "knitr", "kableExtra", "ggplot2", "ape", "PhenotypeSpace", "lmerTest", "brms")

aa <- lapply(x, function(y) {
  
  # check if installed, if not then install 
  if (!y %in% installed.packages()[,"Package"]) 
    install.packages(y) 

  # load package
  try(require(y, character.only = T), silent = T)
})
knitr::opts_knit$set(root.dir = normalizePath(".."))

knitr::opts_chunk$set(dpi = 38, fig.width = 18, fig.height = 10, echo = TRUE) 

options(knitr.kable.NA = '')

source('~/Dropbox/Projects/lbh_cultural_evolution_revBayes/source/custom_treespace.R')

theme_set(theme_classic(base_size = 22))
rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")

rb.trees <- rb.trees[grep("run_", names(rb.trees), invert = TRUE, value = TRUE)]

trees_diags <- data.frame(model = names(rb.trees))

# get lek name
trees_diags$lek <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 1)

# substitution model
trees_diags$subs <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 2)

# period
trees_diags$period <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 3)

# and which fossils were used
trees_diags$fossils <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 4)

trees_diags$align <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 5)

trees_diags$n_trees <- sapply(rb.trees, function(x) length(x$trees))

# Number of models by iterations and lek
kbl <- knitr::kable(as.matrix(table(trees_diags$n_trees, trees_diags$lek)), caption = "Number of models with a specific number of trees by lek")

kableExtra::kable_styling(kbl)

Tree spaces by leks

All fossils using pairwise shared tips

  • 100 trees for each model evenly spaced along the chain
  • Some trees were removed if NAs were produced when comparing topologies (i.e. non-comparable topologies)
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")

tree_names <- grep("early", names(rb.trees), invert = TRUE, value = TRUE)

pnts_lks <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks) <- sel_leks

sapply(pnts_lks, is.null)

saveRDS(pnts_lks, file = "./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

New and old data sets

pnts_lks <- readRDS("./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
      ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain)))) 
    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Summary boxplots

Values were mean-centered within lek

out <- lapply(pnts_lks, function(x){
  
  Y <- space_size(X = x, dimensions = c("x", "y"), group = "chain", pb = FALSE)
  Y$mean.c.size <- Y$size - mean(Y$size)
  # Y$size <- Y$size / max(Y$size)
 
  return(Y)  
})

topo_size_all <- do.call(rbind, out)

topo_size_all$alignment <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_size_all$data.set <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_size_all$fossils <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_size_all$models <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_size_all$lek <- substr(rownames(topo_size_all), 0 , 3)

ggplot(topo_size_all, aes(x = alignment, y = mean.c.size)) + 
  geom_boxplot(fill = viridis(10, alpha = 0.2)[4]) +
  theme(axis.text.x = element_text(size=14, angle=45, hjust = 1, vjust = 1)) + 
  labs(x = "Alignment", y = "Topologic space size") +
  facet_wrap(~ data.set + models)

# similarity

out <- lapply(pnts_lks, function(x){
  
  Y <- space_similarity(X = x, dimensions = c("x", "y"), group = "chain", pb = FALSE, type = "proportional.overlap")
  
  Y$mean.c.overlap <- Y$overlap -  mean(Y$overlap)
  # Y$overlap <- Y$overlap / max(Y$overlap)
  
  return(Y)  
})

topo_similarity_all <- do.call(rbind, out)

topo_similarity_all2 <- topo_similarity_all
names(topo_similarity_all2) <- c("group.2", "group.1", "mean.c.overlap", "overlap")

topo_similarity_all <- rbind(topo_similarity_all, topo_similarity_all2)[, c( "group.1", "mean.c.overlap", "overlap")]

topo_similarity_all$alignment <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_similarity_all$data.set <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_similarity_all$fossils <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_similarity_all$models <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])


topo_similarity_all$lek <- substr(rownames(topo_similarity_all), 0 , 3)


ggplot(topo_similarity_all, aes(x = alignment, y = mean.c.overlap)) + 
  geom_boxplot(fill = viridis(10, alpha = 0.2)[4]) +
  theme(axis.text.x = element_text(size=14, angle=45, hjust = 1, vjust = 1)) + 
  labs(x = "Alignment", y = "Topologic space similarity") +
  facet_wrap(~ data.set + models)

Clumping molecular clocks

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Early fossils using only pairwise shared tips

#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
# sel_leks <- c("SUR", "TR1")

tree_names <- grep("early", names(rb.trees), value = TRUE)

pnts_lks <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks) <- sel_leks

sapply(pnts_lks, is.null)

saveRDS(pnts_lks, file = "./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

New and old data sets

pnts_lks <- readRDS("./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain)))) 

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

Summary boxplots

Values were mean-centered within lek

out <- lapply(pnts_lks, function(x){
  
  Y <- space_size(X = x, dimensions = c("x", "y"), group = "chain", pb = FALSE)
  Y$mean.c.size <- Y$size - mean(Y$size)
  # Y$size <- Y$size / max(Y$size)
  
  return(Y)  
})

topo_size_early <- do.call(rbind, out)

topo_size_early$alignment <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_size_early$data.set <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_size_early$fossils <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_size_early$models <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_size_early$lek <- substr(rownames(topo_size_early), 0 , 3)

ggplot(topo_size_early, aes(x = alignment, y = mean.c.size)) + 
  geom_boxplot(fill = viridis(10, alpha = 0.2)[4]) +
  theme(axis.text.x = element_text(size=14, angle=45, hjust = 1, vjust = 1)) + 
  labs(x = "Alignment", y = "Topologic space size") +
  facet_wrap(~ data.set + models)

# similarity

out <- lapply(pnts_lks, function(x){
  
  Y <- space_similarity(X = x, dimensions = c("x", "y"), group = "chain", pb = FALSE, type = "proportional.overlap")
  Y$mean.c.overlap <- Y$overlap - mean(Y$overlap)
  # Y$overlap <- Y$overlap / max(Y$overlap)
  
  return(Y)  
})

topo_similarity_early <- do.call(rbind, out)

topo_similarity_early2 <- topo_similarity_early
names(topo_similarity_early2) <- c("group.2", "group.1", "mean.c.overlap", "overlap")

topo_similarity_early <- rbind(topo_similarity_early, topo_similarity_early2)[, c( "group.1",  "mean.c.overlap", "overlap")]

topo_similarity_early$alignment <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_similarity_early$data.set <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_similarity_early$fossils <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_similarity_early$models <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_similarity_early$lek <- substr(rownames(topo_similarity_early), 0 , 3)

ggplot(topo_similarity_early, aes(x = alignment, y = mean.c.overlap)) + 
  geom_boxplot(fill = viridis(10, alpha = 0.2)[4]) +
  theme(axis.text.x = element_text(size=14, angle=45, hjust = 1, vjust = 1)) + 
  labs(x = "Alignment", y = "Topologic space similarity") +
  facet_wrap(~ data.set + models)

Clumping molecular clocks

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Stats

Size of the topological space

# combine data sets
topo_size_all$fossils <- "all"
topo_size_early$fossils <- "early"
topo_size <- rbind(topo_size_all, topo_size_early)

iterations <- 5000

priors <- c(set_prior("normal(0, 1.5)", class = "Intercept"), set_prior("normal(0, 1.5)", class = "b"))

size_mod <- brm(size ~ alignment + data.set + models + fossils, data = topo_size, prior = priors, iter = iterations, chains = 3, silent = 2, seed = 5)


priors <- c(set_prior("normal(0, 1.5)", class = "b"))

# without intercept 
size_mod_no_intercept <- brm(size ~ alignment + data.set + models + fossils -1, data = topo_size, prior = priors, iter = iterations, chains = 3, silent = 2, seed = 5)

saveRDS(list(size_mod = size_mod, size_mod_no_intercept = size_mod_no_intercept), "./data/processed/regression_results_topological_size.RDS")

Intercept-included model:

size_mods <- readRDS("./data/processed/regression_results_topological_size.RDS")

size_mods$size_mod
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: size ~ alignment + data.set + models + fossils 
##    Data: topo_size (Number of observations: 96) 
##   Draws: 3 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 7500
## 
## Population-Level Effects: 
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept            0.32      0.08     0.16     0.49 1.00    11144     5436
## alignmentoptimal    -0.00      0.09    -0.18     0.17 1.00     9896     5775
## alignmentprank      -0.12      0.09    -0.30     0.05 1.00     9964     6014
## data.setold          0.35      0.07     0.21     0.49 1.00    12043     5663
## modelsUexp           0.05      0.07    -0.08     0.19 1.00    10779     5259
## fossilsearly        -0.00      0.07    -0.14     0.14 1.00    12679     5083
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.34      0.03     0.30     0.40 1.00    10381     5511
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Contrasts for alignment strategies:

aligment_contrasts <- c(prnk_vs_agnostic = "alignmentprank - alignmentall.equal = 0", prnk_vs_optimal = "alignmentprank - alignmentoptimal = 0", optimal_vs_agnostic = "alignmentoptimal - alignmentall.equal = 0")

hypothesis(size_mods$size_mod_no_intercept, aligment_contrasts)
## Hypothesis Tests for class b:
##            Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1    prnk_vs_agnostic    -0.12      0.09    -0.29     0.05         NA        NA
## 2     prnk_vs_optimal    -0.12      0.09    -0.29     0.05         NA        NA
## 3 optimal_vs_agnostic     0.00      0.09    -0.17     0.16         NA        NA
##   Star
## 1     
## 2     
## 3     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Similarity of the topological space

topo_similarity_all$fossils <- "all"
topo_similarity_early$fossils <- "early"

topo_similarity <- rbind(topo_similarity_all, topo_similarity_early)

similarity_mod <- brm(overlap ~ alignment + data.set + models + fossils, data = topo_similarity, prior = priors, iter = iterations, chains = 3, silent = 2, seed = 5)

# without intercept 
similarity_mod_no_intercept <- brm(overlap ~ alignment + data.set + models + fossils -1, data = topo_similarity, iter = iterations, chains = 3, silent = 2, seed = 5)


saveRDS(list(similarity_mod = similarity_mod, similarity_mod_no_intercept = similarity_mod_no_intercept), "./data/processed/regression_results_topological_similiarity.RDS")

Intercept-included model:

similarity_mods <- readRDS("./data/processed/regression_results_topological_similiarity.RDS")

similarity_mods$similarity_mod
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: overlap ~ alignment + data.set + models + fossils 
##    Data: topo_similarity (Number of observations: 912) 
##   Draws: 3 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 7500
## 
## Population-Level Effects: 
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept            0.15      0.02     0.12     0.18 1.00     9958     5540
## alignmentoptimal    -0.03      0.02    -0.07     0.00 1.00     9358     5836
## alignmentprank      -0.09      0.02    -0.13    -0.06 1.00     9072     6059
## data.setold         -0.11      0.01    -0.14    -0.08 1.00    11548     5900
## modelsUexp          -0.00      0.01    -0.03     0.03 1.00    10243     5235
## fossilsearly         0.00      0.01    -0.03     0.03 1.00    11138     5780
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.22      0.01     0.21     0.23 1.00    11619     5798
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Contrasts for alignment strategies:

hypothesis(similarity_mods$similarity_mod_no_intercept, aligment_contrasts)
## Hypothesis Tests for class b:
##            Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1    prnk_vs_agnostic    -0.09      0.02    -0.13    -0.06         NA        NA
## 2     prnk_vs_optimal    -0.06      0.02    -0.10    -0.03         NA        NA
## 3 optimal_vs_agnostic    -0.03      0.02    -0.07     0.00         NA        NA
##   Star
## 1    *
## 2    *
## 3     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

R session information

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] brms_2.16.3          Rcpp_1.0.8.3         lmerTest_3.1-3      
##  [4] lme4_1.1-27.1        Matrix_1.3-4         PhenotypeSpace_0.1.0
##  [7] warbleR_1.1.27       NatureSounds_1.0.4   seewave_2.2.0       
## [10] tuneR_1.4.0          ape_5.5              ggplot2_3.3.5       
## [13] kableExtra_1.3.1     knitr_1.39           viridis_0.6.2       
## [16] viridisLite_0.4.0    pbapply_1.5-0       
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.3.0       fastmatch_1.1-0       plyr_1.8.6           
##   [4] igraph_1.2.6          sp_1.4-5              splines_4.1.0        
##   [7] crosstalk_1.1.1       inline_0.3.19         rstantools_2.1.1     
##  [10] digest_0.6.29         htmltools_0.5.2       rsconnect_0.8.18     
##  [13] fansi_1.0.0           magrittr_2.0.3        checkmate_2.0.0      
##  [16] tensor_1.5            cluster_2.1.2         RcppParallel_5.1.4   
##  [19] matrixStats_0.61.0    xts_0.12.1            spatstat.sparse_2.0-0
##  [22] CircStats_0.2-6       prettyunits_1.1.1     colorspace_2.0-2     
##  [25] signal_0.7-7          rvest_1.0.0           xfun_0.31            
##  [28] dplyr_1.0.7           callr_3.7.0           crayon_1.5.1         
##  [31] RCurl_1.98-1.6        jsonlite_1.8.0        spatstat.data_2.1-0  
##  [34] phangorn_2.7.1        zoo_1.8-9             glue_1.6.2           
##  [37] polyclip_1.10-0       gtable_0.3.0          webshot_0.5.2        
##  [40] V8_3.4.2              distributional_0.2.2  pkgbuild_1.2.0       
##  [43] rstan_2.21.2          abind_1.4-5           scales_1.1.1         
##  [46] mvtnorm_1.1-2         DBI_1.1.1             miniUI_0.1.1.1       
##  [49] dtw_1.22-3            xtable_1.8-4          diffobj_0.3.4        
##  [52] spatstat.core_2.3-0   proxy_0.4-26          StanHeaders_2.21.0-7 
##  [55] stats4_4.1.0          DT_0.18               htmlwidgets_1.5.3    
##  [58] httr_1.4.2            threejs_0.3.3         posterior_1.1.0      
##  [61] ellipsis_0.3.2        pkgconfig_2.0.3       loo_2.4.1.9000       
##  [64] farver_2.1.0          sass_0.4.0            deldir_0.2-10        
##  [67] utf8_1.2.2            labeling_0.4.2        tidyselect_1.1.1     
##  [70] rlang_1.0.2           reshape2_1.4.4        later_1.2.0          
##  [73] munsell_0.5.0         tools_4.1.0           cli_3.1.0            
##  [76] generics_0.1.0        ade4_1.7-17           adehabitatHR_0.4.19  
##  [79] ggridges_0.5.3        evaluate_0.15         stringr_1.4.0        
##  [82] fastmap_1.1.0         yaml_2.3.5            goftest_1.2-2        
##  [85] processx_3.5.2        purrr_0.3.4           nlme_3.1-152         
##  [88] projpred_2.0.2        mime_0.11             xml2_1.3.2           
##  [91] compiler_4.1.0        bayesplot_1.8.1       shinythemes_1.2.0    
##  [94] rstudioapi_0.13       gamm4_0.2-6           curl_4.3.2           
##  [97] spatstat.utils_2.2-0  tibble_3.1.6          bslib_0.2.5.1        
## [100] stringi_1.7.6         highr_0.9             ps_1.6.0             
## [103] Brobdingnag_1.2-6     rgeos_0.5-5           lattice_0.20-44      
## [106] nloptr_1.2.2.2        markdown_1.1          shinyjs_2.0.0        
## [109] vegan_2.5-7           fftw_1.0-7            permute_0.9-5        
## [112] tensorA_0.36.2        vctrs_0.3.8           pillar_1.6.4         
## [115] lifecycle_1.0.1       spatstat.geom_2.2-2   jquerylib_0.1.4      
## [118] bridgesampling_1.1-2  bitops_1.0-7          raster_3.4-13        
## [121] httpuv_1.6.1          R6_2.5.1              promises_1.2.0.1     
## [124] gridExtra_2.3         codetools_0.2-18      boot_1.3-28          
## [127] colourpicker_1.1.0    MASS_7.3-54           gtools_3.9.2         
## [130] assertthat_0.2.1      rjson_0.2.21          withr_2.4.3          
## [133] shinystan_2.5.0       adehabitatMA_0.3.14   mgcv_1.8-36          
## [136] parallel_4.1.0        terra_1.5-12          quadprog_1.5-8       
## [139] grid_4.1.0            rpart_4.1-15          adehabitatLT_0.3.25  
## [142] coda_0.19-4           minqa_1.2.4           rmarkdown_2.13       
## [145] numDeriv_2016.8-1.1   shiny_1.6.0           base64enc_0.1-3      
## [148] dygraphs_1.1.1.6